
**************************************************************
* CROWDFUNDING
* CLEANING AND PREPARATION OF DATASETS
* CODE BY: ERIK ANSINK
**************************************************************



**************************************************************
* OPEN EXPERIMENT DATABASE
**************************************************************

use "crowddata.dta", clear




**************************************************************
* TIDY UP
**************************************************************

* Replace NULL by missing data
foreach x of varlist NumberOfTokensBet BetDateTime Seconds TotalBetCountUntilNow TotalBetAmountUntilNow PaymentIfTresholdIsMet LoginID MinimumAmountToStart ProjectName{
  replace `x' = "." if(`x' == "NULL")
  }

* Convert string variables to numeric
destring, replace

* Drop unused variables
drop Reason NipoBaseHouseholdID LoginDateTime

* Some groups in different treatments have equal numbers, avoid this by generating new group numbers that relate to experiment + treatments
replace Group = exp*1000+TreatmentID*100+Group

* Split the datetime variables, and convert everything to days, then seconds from experiment start (differs by treatment)
split BetDateTime
split BetDateTime2, p(.)
destring, replace
split BetDateTime1, p(-)
destring, replace

* convert days of months to day 0,1,2,3 of the experiment using the following dates of each session:
	* exp 0 session 1: 29 oct /  1 nov 2015	treatments 3
	* exp 0 session 2: 19 nov / 22 nov 2015	treatments 5/6
	* exp 0 session 3: 26 nov / 29 nov 2015	treatments 4/7
	* exp 1 session 1: 11 may / 14 may 2017	treatments 3
	* exp 1 session 2: 18 may / 21 may 2017	treatments 5 
	* exp 1 session 3:  1 jun /  4 jun 2017	treatments 4
replace BetDateTime13 = BetDateTime13-29 if exp == 0 & TreatmentID == 3
* and since the last day is 1 november we have to:
replace BetDateTime13 = 3 if BetDateTime13 == -28 & TreatmentID == 3
replace BetDateTime13 = BetDateTime13-19 if exp == 0 & TreatmentID == 5 | TreatmentID == 6
replace BetDateTime13 = BetDateTime13-26 if exp == 0 & TreatmentID == 4 | TreatmentID == 7
replace BetDateTime13 = BetDateTime13-11 if exp == 1 & TreatmentID == 3
replace BetDateTime13 = BetDateTime13-18 if exp == 1 & TreatmentID == 5
replace BetDateTime13 = BetDateTime13-1  if exp == 1 & TreatmentID == 4
rename BetDateTime13 BetDay
rename BetDateTime21 BetTime
split BetTime, p(:)
destring, replace
* Subtract 8 hours from the first day since we start at 8AM
gen SecondsFromStart = BetDay*24*60*60 + (BetTime1-8)*60*60 + BetTime2*60 + BetTime3
rename Seconds SecondsFromLogin
drop BetDateTime BetDateTime1 BetDateTime2 BetDateTime11 BetDateTime12 BetDateTime22 BetTime1 BetTime2 BetTime3


* Rename variables
rename ParticipantID participantID
rename MinimumAmountToStart threshold
rename PaymentIfTresholdIsMet bonus
rename TotalBetAmountUntilNow totalcontributionamount
rename TotalBetCountUntilNow totalcontributioncount
rename LoginID loginID
rename LoginDays logindays
rename SecondsFromStart secfromstart
rename SecondsFromLogin secfromlogin
rename TreatmentID treatmentID
rename NumberOfTokensBet contribution
rename Group group
rename BetTime contributiontime
rename BetDay contributionday

* Label variables
label variable exp "Experiment (0=core, 1=follow-up)"
label variable participantID "Participant ID"
label variable threshold "Project threshold"
label variable bonus "Project bonus"
label variable totalcontributionamount "Contribution level up to now"
label variable totalcontributioncount "Contribution count up to now"
label variable loginID "Login ID"
label variable logindays "Number of days with login"
label variable secfromstart "Contribution made at # seconds from start experiment"
label variable secfromlogin "Contribution made at # seconds from login"
label variable group "Group number: 1st digit = exp, 2nd digit = treatmentID"
label variable treatmentID "Treatment ID"
label variable contribution "Number of tokens contributed"
label variable contributiontime "Time of contribution"
label variable contributionday "Day of contribution"

* Replace project names by ID and description
split ProjectName
destring, replace
gen byte projectID = ProjectName2
label variable projectID "Project ID"
drop ProjectName ProjectName1 ProjectName2
egen projectdescription = concat(threshold bonus), punct(-)
label variable projectdescription "Project description"
		
* Generate variable that replaces treatment numbers by meaningful names
gen treatmentdescription = "BEN" if treatmentID == 3
replace treatmentdescription = "SEED72" if treatmentID == 4
replace treatmentdescription = "SIA72" if treatmentID == 5
replace treatmentdescription = "SIX" if treatmentID == 6
replace treatmentdescription = "SIXSEED78" if treatmentID == 7
label variable treatmentdescription "Treatment description"

* Order the variables
order exp group, after(participantID)
order treatmentdescription, after(treatmentID)
order projectID projectdescription threshold bonus, after(treatmentdescription)
order contributionday contributiontime, after(loginID)
order secfromlogin secfromstart, after(contributiontime)
order contribution, after(loginID)

* Generate a dummy for one type of irrational behavior: contribution over the threshold
gen overcontribution = 1 if contribution > 0 & contribution + totalcontributionamount > threshold & contribution<.
replace overcontribution = 0 if overcontribution == .
label variable overcontribution "Irrational: contribution over threshold"

* Drop all subjects that did not login once
drop if loginID ==.

save tempdata/login.dta, replace






**************************************************************
* CREATE PARTICIPANT*PROJECT-LEVEL DATABASE  
**************************************************************

* Generate dummies to count number of subjects per group as well as number of logins (further below)
egen oneperparticipantlogin = tag(participantID loginID)
egen oneperparticipantgroup = tag(participantID group)
egen subjectspergroup = total(oneperparticipantgroup), by(group)
label variable subjectspergroup "Subjects per group"
* Generate early contributions
egen cont1day = total(contribution) if contributionday==0, by(participant projectID)
egen cont2days = total(contribution) if contributionday<2, by(participant projectID)
egen cont5hrs = total(contribution) if secfromstart<18000, by(participant projectID)
egen cont10hrs = total(contribution) if secfromstart<36000, by(participant projectID)
egen cont15hrs = total(contribution) if secfromstart<54000, by(participant projectID)
* Generate early contributions, disjoint intervals
egen cont10hrsa = total(contribution) if secfromstart<36000 & secfromstart>17999, by(participant projectID)
egen cont15hrsa = total(contribution) if secfromstart<54000 & secfromstart>35999, by(participant projectID)
label variable cont1day "Number of tokens contributed on day 1"
label variable cont2days "Number of tokens contributed on days 1-2"
label variable cont5hrs "Number of tokens contributed in hour 1-5"
label variable cont10hrs "Number of tokens contributed in hour 1-10"
label variable cont15hrs "Number of tokens contributed in hour 1-15"
label variable cont10hrsa "Number of tokens contributed in hour 5-10"
label variable cont15hrsa "Number of tokens contributed in hour 10-15"


sort participantID projectID loginID

* Collapse to participant*project level, but first save variable labels
foreach v of var * {
	local l`v' : variable label `v'
	if "`l`v''" == "" {
	local l`v' "`v'"
	}
}

collapse (first) logindays exp treatmentID treatmentdescription projectdescription group threshold bonus subjectspergroup (min) firstcontributionsecs=secfromstart (count) logins=oneperparticipantlogin contributionscount=contribution (sum) contributionssum=contribution (max) cont1day cont2days cont5hrs cont10hrs cont10hrsa cont15hrs cont15hrsa overcontribution, by(participantID projectID)

* Attach the saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

label variable firstcontributionsecs "Contribution made at # seconds from start experiment"
label variable logins "Number of logins"
label variable contributionscount "Number of contributions made"
label variable contributionssum "Sum of contributions made"

* Generate a dummy for one type of irrational behavior: no deviation constraint
sort participantID
bysort participantID: egen totalcontributionspp = total(contributionssum)
gen remainingtokens = 34-totalcontributionspp
gen remainingtokenspp = min(bonus-contributionssum,remainingtokens)
sort group projectID
bysort group projectID: egen tbppg = total(contributionssum)
gen gap = threshold - tbppg if threshold - tbppg>0
gen could = 1 if gap <= remainingtokenspp
replace could = 0 if could ==.
label variable could "Irrational: no deviation constraint"
drop remainingtokens remainingtokenspp tbppg gap totalcontributionspp

* Generate gini coefficient
* requires installation of program "egen_inequal"
ssc instal egen_inequal
egen gini=inequal(contributionssum), by(group projectID) 
label variable gini "Gini coefficient by group*project"

save tempdata/participantproject.dta, replace







**************************************************************
* MERGE EXPERIMENT WITH SURVEY DATA
* TO CREATE PARTICIPANT-LEVEL DATABASE  
**************************************************************

preserve

* Generate payoffs per project
egen groupcontributions = total(contributionssum), by(group projectID)
gen overinvestment = groupcontributions - threshold
gen projectsuccess = 1 if overinvestment >=0
gen projectpayoff = projectsuccess*(bonus-contributionssum)
drop groupcontributions overinvestment projectsuccess
label variable projectpayoff "Per-project payoff"

* Generate a dummy for one type of irrational behavior: investing more than the bonus level
gen totalcontributionsproject1= contributionssum if projectID==1
gen totalcontributionsproject2= contributionssum if projectID==2
gen totalcontributionsprojects12 = totalcontributionsproject1+totalcontributionsproject2
gen morethanbonus = 1 if contributionssum>bonus
replace morethanbonus = 0 if morethanbonus == .
label variable morethanbonus "Irrational: contributing more than bonus"
drop totalcontributionsproject1 totalcontributionsproject2 totalcontributionsprojects12

* Collapse to participant level, but first save variable labels
foreach v of var * {
	local l`v' : variable label `v'
	if "`l`v''" == "" {
	local l`v' "`v'"
	}
}

collapse (first) exp treatmentID treatmentdescription group threshold bonus subjectspergroup firstcontributionsecs logins logindays contributionscount (sum) projectcontributionsall=contributionssum projectpayoffall=projectpayoff (max) morethanbonus could overcontribution, by(participantID)

* Attach the saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

label variable projectpayoffall "Payoff from all projects"
label variable projectcontributionsall "Contributions to all projects"


* Generate payoffs
gen participantpayoff = 34 + projectpayoffall
label variable participantpayoff "Participant payoff"

* Drop subjects that did not login on each day
drop if logindays<4

* Merge experiment data with survey data
merge 1:1 participantID using surveydata.dta, keep(match) nogenerate

* Drop irrelevant or empty variables, rename, and label
drop WAVE DATE D0542S3 GEZINSCYCLUS TYPEHUIS POSTCODE GEMGROOTTE HKW_GSL HKW_LFT HKW_OPL_VOLT V21002 V21007_2 V21007_3 V21005_2 V21005_3
egen member = rowtotal(V21007_*)
egen donate = rowtotal(V21005_*)
drop V21007_* V21005_*

rename INKOMENLANG income
rename SOCIALEKLASSE socialclass
rename V10 riskaversionWTP
rename V20 riskaversionWTA
rename V30_1 inequalityaversion1 
rename V30_2 inequalityaversion2
rename V30_3 inequalityaversion3 
rename V30_4 inequalityaversion4 
rename V30_5 inequalityaversion5 
rename V30_6 inequalityaversion6 
rename V40_1 St_persoon_1
rename V40_2 St_persoon_2
rename V40_3 St_persoon_3
rename V50_1 St_spel_1
rename V50_2 St_spel_2
rename V50_3 St_spel_3
rename V50_4 St_spel_4
rename V60 V_spel_1
rename V70 V_spel_2
rename V80 V_spel_3
rename GSL female
rename LFT age
rename OPL_VOLT edu

replace female=female-1
replace V_spel_3=V_spel_3-1

label variable St_persoon_1 "I give to society (1-5 Likert scale)"
label variable St_persoon_2 "I put family first (1-5 Likert scale)"
label variable St_persoon_3 "I find money decisions hard (1-5 Likert scale)"
label variable St_spel_1 "Goal: Fair play (1-5 Likert scale)"
label variable St_spel_2 "Goal: Earn tokens (1-5 Likert scale)"
label variable St_spel_3 "Goal: Do as others (1-5 Likert scale)"
label variable St_spel_4 "Goal: Two projects (1-5 Likert scale)"
label variable V_spel_1 "Game: Instructions not clear (1-5 Likert scale)"
label variable V_spel_2 "Game: Easy to play (1-4 Likert scale)"
label variable V_spel_3 "Game: Wanted to play more (No=0, Yes=1)"
label variable female "Gender (Male=0, Female=1)"
label variable age "Age (Yrs)"
label variable edu "Education (8 Category-scale)"
label variable income "Household income (20 Category-scale)"
label variable member "\# Memberships"
label variable donate "\# Donations"

save tempdata/participant.dta, replace

restore







**************************************************************
* CREATE GROUP*PROJECT-LEVEL DATABASE  
**************************************************************

sort group projectID firstcontributionsecs

* Collapse to group*project level, but first save variable labels
foreach v of var * {
	local l`v' : variable label `v'
	if "`l`v''" == "" {
	local l`v' "`v'"
	}
}

collapse (first) exp treatmentID treatmentdescription projectdescription threshold bonus subjectspergroup (min) firstcontributionsecs gini (sum) groupcontributions=contributionssum cont1day cont2days cont5hrs cont10hrs cont10hrsa cont15hrs cont15hrsa (max) could, by(group projectID)

* Attach the saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

label variable groupcontributions "Group contributions to all projects"

* Generate the projecsuccess variable; to do so add 20 seed tokens to the seeded projects under SEED72 and SIXSEED78
gen overinvestment = groupcontributions - threshold
replace overinvestment = overinvestment+20 if treatmentID==4 & projectID==1
replace overinvestment = overinvestment+20 if treatmentID==7 & projectID==5
gen projectsuccess = 1 if overinvestment >=0

label variable overinvestment "Group over-investment in a project"
label variable projectsuccess "Dummy=1 if group reaches project threshold"

* Drop small groups with less than 5 players
drop if subjectspergroup<5

sort group projectID
save tempdata/groupproject.dta, replace






**************************************************************
* CREATE TREATMENT*PROJECT-LEVEL DATABASE  
**************************************************************


preserve

replace projectsuccess=0 if projectsuccess==.

* Collapse to treatment*project level, but first save variable labels
foreach v of var * {
	local l`v' : variable label `v'
	if "`l`v''" == "" {
	local l`v' "`v'"
	}
}

collapse (first) treatmentdescription projectdescription threshold bonus (count) groupspertreatment=group (sum) treatmentprojectcontributions=groupcontributions treatmentprojectsuccess=projectsuccess, by(exp projectID treatmentID)
sort exp treatmentID projectID

* Attach the saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

label variable groupspertreatment "Groups per treatment"

label variable treatmentprojectcontributions "Treatment project contributions"
label variable treatmentprojectsuccess "Treatment project success"

gen avgtreatmentprojectcontributions = treatmentprojectcontributions / groupspertreatment
gen avgtreatmentprojectsuccess = treatmentprojectsuccess / groupspertreatment

label variable avgtreatmentprojectcontributions "Treatment project contributions (average)"
label variable avgtreatmentprojectsuccess "Treatment project success (average)"

save tempdata/treatmentproject.dta, replace
restore





**************************************************************
* CREATE GROUP-LEVEL DATABASE  
**************************************************************


gen totalcontributionsproject1= groupcontributions if projectID==1
gen totalcontributionsproject2= groupcontributions if projectID==2
gen totalcontributionsproject5= groupcontributions if projectID==5
gen projectsuccess1= projectsuccess if projectID==1
gen projectsuccess2= projectsuccess if projectID==2
gen projectsuccess5= projectsuccess if projectID==5

* Collapse to group level, but first save variable labels
foreach v of var * {
	local l`v' : variable label `v'
	if "`l`v''" == "" {
	local l`v' "`v'"
	}
}

collapse (first) exp treatmentID treatmentdescription subjectspergroup (min) firstcontributionsecs (sum) totalcontributionspergroup=groupcontributions totalcontributionsproject1 totalcontributionsproject2 totalcontributionsproject5 totalprojectsuccesspergroup=projectsuccess projectsuccess1 projectsuccess2 projectsuccess5, by(group)

* Attach the saved labels
foreach v of var * {
	label var `v' "`l`v''"
}

sort group

gen projectsuccess12 = projectsuccess1 + projectsuccess2
gen totalcontributionsprojects12 = totalcontributionsproject1 + totalcontributionsproject2

gen contributiondiff72=totalcontributionsproject1-(totalcontributionspergroup-totalcontributionsproject1)
gen thresholddiff72=projectsuccess1-(totalprojectsuccesspergroup-projectsuccess1)
gen contributiondiff78=totalcontributionsproject5-(totalcontributionspergroup-totalcontributionsproject5)
gen thresholddiff78=projectsuccess5-(totalprojectsuccesspergroup-projectsuccess5)

gen contributionratio72=totalcontributionsproject1/(totalcontributionspergroup-totalcontributionsproject1)
gen thresholdratio72=projectsuccess1/(totalprojectsuccesspergroup-projectsuccess1)
gen contributionratio78=totalcontributionsproject5/(totalcontributionspergroup-totalcontributionsproject5)
gen thresholdratio78=projectsuccess5/(totalprojectsuccesspergroup-projectsuccess5)

drop totalcontributionsproject2 projectsuccess2

label variable totalcontributionsproject1 "Contributions made target project 72"
label variable totalcontributionsproject5 "Contributions made target project 78"
label variable projectsuccess1 "Thresholds reached target project 72"
label variable projectsuccess5 "Thresholds reached target project 78"
label variable totalcontributionspergroup "Total contributions per group"
label variable totalprojectsuccesspergroup "Total thresholds per group"
label variable projectsuccess12 "BEN projects thresholds reached"
label variable totalcontributionsprojects12 "BEN projects contributions made"
label variable contributiondiff72 "Contribution difference target project 72"
label variable thresholddiff72 "Threshold difference target project 72"
label variable contributiondiff78 "Contribution difference target project 78"
label variable thresholddiff78 "Threshold difference target project 78"
label variable contributionratio72 "Contribution ratio target project 72"
label variable thresholdratio72 "Threshold ratio target project 72"
label variable contributionratio78 "Contribution ratio target project 78"
label variable thresholdratio78 "Threshold ratio target project 78"

save tempdata/group.dta, replace
